getwd()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Packages
set.seed(717)
library(cubature)
library(lava) 
library(ggplot2)
library(mcGlobaloptim) 
library(DiceKriging) 
library(nloptr) 
library(MASS)
library(mcmc)
library(geoR)
library(RobustCalibration)
library(lhs)
library(RobustGaSP)
library(numDeriv)
library(ggplot2)
library(rTensor)
library(plotly)
library(R.matlab)
library(proxy)
library(abind)
library(scatterplot3d)
library(Matrix)
library(purrr)
library(plyr)
library(multiway)

# tempdir()
# dir.create(tempdir())
load("c2-setting.RData")

data <- readMat("data2.mat")$data

data.inx <- data[,1:3]
data.x <- data[,4:7]
data.y <- data[,8]

dim.f = c(5,4,4)
d = 4; lower.x = apply(data.x,2,min); upper.x = apply(data.x,2,max)

dim.s = c(2,2,2)
dim.h = prod(dim.s); dim.mode = length(dim.s)

k = 1; N = nrow(data.x)

h <- function(y) sum(y)
true.model<-function(y) sum(sort(y, decreasing = TRUE)[1:k])

y0 = array(NA, c(dim.f,N))
for(i in 1:N) y0[data.inx[i,1],data.inx[i,2],data.inx[i,3],i] = data.y[i]

ind.x.star = which.max(apply(y0,dim.mode+1,true.model))
h.star = apply(y0,dim.mode+1,true.model)[ind.x.star]
ind.star = arrayInd(order(y0[,,,ind.x.star], decreasing = TRUE)[1:k],dim.f)
ora.x.star = data.x[ind.x.star,]


################################################################################
#### GP ########################################################################
################################################################################
## Kernel
norm1 <- function(x1,x2) as.matrix(x1%*%t(x2))

mat0 <- function(x) matern(x, phi=1, kappa=5/2)
gau0 <- function(x) exp(-x^2)
exp0 <- function(x) exp(-abs(x))

ker.sele <- function(x1,x2,theta){
  x1 = matrix(x1,length(x1)/d,d); x2 = matrix(x2,length(x2)/d,d)
  n.s1 = nrow(x1); n.s2 = nrow(x2)
  theta1 = matrix(theta[3:(d+2)],n.s1,d,byrow=TRUE); theta2 = matrix(theta[3:(d+2)],n.s2,d,byrow=TRUE)
  R = theta[1]+theta[2]*norm1(x1-theta1,x2-theta2)
  return(R)
}

B.tuck.ml <- function(y,ind,n){
  y_0 = array(NA,dim=c(dim.f,n))
  
  for (i in 1:n) {
    for(j in 1:k){
      idx <- ind[[i]]
      y_0[idx[j,1],idx[j,2],idx[j,3],i] = y[j,i]
    }
  }
  
  tuck.re = multiway::tucker(y_0, nfac = c(dim.s,n), nstart = 1, Dfixed = diag(n))
  U.tuck = list(tuck.re$C,tuck.re$B,tuck.re$A)
  core.ten = tuck.re$G
  
  return(list(core.ten=core.ten, U.tuck=U.tuck))
}

pos <- function(ind) array(1:prod(dim.f),dim.f)[ind]

e.ind <- function(ind){
  e0 = pos(ind); ord = cbind(1:k,e0)
  e.re = matrix(0,k,prod(dim.f))
  e.re[ord] = 1
  return(e.re)
}



################################################################################
## Our proposed method: NS-TOGP
## Kernel
vec.lab = list()
for(om.lab in 1:dim.mode){
  vec.lab[[om.lab]] = dim.s[om.lab]*(dim.s[om.lab]+1)/2
}
vec.lab[[dim.mode+1]] = d+2
vec.lab[[dim.mode+2]] = vec.lab[[dim.mode+3]] = 1
group.lab <- unlist(Map(rep, LETTERS[1:length(vec.lab)], unlist(vec.lab)))
dim.hyper.ml = length(group.lab)

lower.th = c(unlist(Map(rep, c(rep(1e-3,dim.mode),1e-1,1e-2,1e-10), unlist(vec.lab))))
upper.th = c(unlist(Map(rep, c(rep(1,dim.mode),10,10,1e-2), unlist(vec.lab))))

sig.ml <- function(t,ome){
  O = matrix(0,t,t)
  O[lower.tri(O, diag = TRUE)] <- ome
  return(O)
}

ker.ml <- function(y,n,the){
  the0 = split(the, group.lab)
  
  Omega = list()
  for(i in 1:dim.mode) Omega[[i]] = list(dim.s[i],the0[[i]])
  
  sig = lapply(Omega,function(the) sig.ml(the[[1]],the[[2]]))
  sig.re = Map(function(B) B%*%t(B), sig)
  return(Reduce(kronecker,sig.re))
}


likeli.ml <- function(x1,x2,y,ind,n,the){
  the0 = split(the, group.lab)
  sig2 = the0[[dim.mode+2]]; tau2 = the0[[dim.mode+3]]
  
  omega = ker.ml(y,n,the)
  k.ini = kronecker(ker.sele(x1,x2,the0[[dim.mode+1]]),omega)
  
  e.ini = as.matrix(bdiag(lapply(ind,function(ind) e.ind(ind)%*%B.tuck)))
  par.tuck.k.y = e.ini%*%k.ini%*%t(e.ini)+tau2*diag(n*k)
  sol.par.tuck.k.y = ginv(par.tuck.k.y)
  
  log.likeli = determinant(par.tuck.k.y,logarithm=TRUE)$modulus+
    t(c(y))%*%sol.par.tuck.k.y%*%c(y)

  return(list(like=log.likeli, the0=the0))
}
# likeli.ml(x,x,y,n,s1,s2,c(runif(dim.hyper.mt)))


EIJ <- function(i,j,l){
  E0 = matrix(0,dim.s[l],dim.s[l]); E0[i,j] = 1
  return(E0)
}

der.l <- function(x1,x2,y,ind,n,the){
  the0 = split(the, group.lab)
  
  ome = list()
  for(i in 1:dim.mode){ome[[i]] = the0[[i]]}
  th = the0[[dim.mode+1]]; sig2 = the0[[dim.mode+2]]; tau2 = the0[[dim.mode+3]]
  
  J <- function(i,l){
    E0 = matrix(0,dim.s[l],dim.s[l]); E0[i,i] = exp(sig.ml(dim.s[l],ome[[l]])[i,i])
    return(E0)
  }
  
  Omega = list()
  for(i in 1:dim.mode) Omega[[i]] = list(dim.s[i],the0[[i]])
  
  sig = lapply(Omega,function(the) sig.ml(the[[1]],the[[2]]))
  sig.re = Map(function(B) B%*%t(B), sig)
  omega = Reduce(kronecker,sig.re)
  
  k.ini = ker.sele(x1,x2,th)
  
  e.ini = as.matrix(bdiag(lapply(ind,function(ind) e.ind(ind)%*%B.tuck)))
  al.k1 = e.ini%*%kronecker(k.ini,omega)%*%t(e.ini)
  
  par.k.y = sig2*al.k1+tau2*diag(n*k)
  sol.par.k.y = solve(par.k.y)
  
  al.k = sol.par.k.y%*%c(y)
  der.l.sig2 = tr(sol.par.k.y%*%al.k1)-t(al.k)%*%al.k1%*%al.k
  der.l.tau2 = tr(sol.par.k.y)-t(al.k)%*%al.k
  
  der.th = array(jacobian(function(theta) ker.sele(x1,x2,theta), th),dim=c(n,n,(d+2)))
  der.l.th.i <- function(der) sig2*(tr(sol.par.k.y%*%e.ini%*%kronecker(der,omega)%*%t(e.ini))-
                                      t(al.k)%*%e.ini%*%kronecker(der,omega)%*%t(e.ini)%*%al.k)
  der.l.th = apply(der.th,3,der.l.th.i)
  
  der.l.phi = list()
  for(l in 1:dim.mode){
    der.l.phi[[l]] = matrix(0, dim.s[l], dim.s[l])
    
    der.l.phi.ij <- function(i,j) EIJ(i,j,l)%*%t(sig[[l]])+sig[[l]]%*%EIJ(j,i,l)
    it1 <- function(der.p){
      list1 = if (l > 1) sig.re[1:(l-1)] else 1
      list2 = if (l < dim.mode) sig.re[(l+1):dim.mode] else 1
      kro.list = list(sig2*k.ini,Reduce(kronecker,list1),der.p,Reduce(kronecker,list2))
      return(Reduce(kronecker,kro.list))
    } 
    it2 <- function(der.p) tr(sol.par.k.y%*%e.ini%*%it1(der.p)%*%t(e.ini))-
      t(al.k)%*%e.ini%*%it1(der.p)%*%t(e.ini)%*%al.k
    
    der.l.phi.1 = sapply(c(1:dim.s[l]), function(i) {
      sapply(c(1:i), function(j) it2(der.l.phi.ij(i, j)))
    })
    
    der.l.phi.ii <- function(i) J(i,l)%*%t(sig[[l]])+sig[[l]]%*%J(i,l)
    der.l.phi.dig = apply(as.matrix(c(1:dim.s[l])),1, function(i) it2(der.l.phi.ii(i)))
    
    for (i in 1:dim.s[l]) {
      der.l.phi[[l]][i, 1:i] <- der.l.phi.1[[i]]
    }
    # diag(der.l.phi[[l]]) = der.l.phi.dig   
  }
  
  result = list(der.l.phi=lapply(der.l.phi, function(mat) mat[lower.tri(mat, diag = TRUE)]), 
                der.l.th=der.l.th, der.l.sig2=der.l.sig2, der.l.tau2=der.l.tau2)
  return(result)
}


mlgp.hat <- function(x.new,ind.new,x,y,ind,n,n.test,hy){
  x.new = matrix(x.new,n.test,d)
  omega = ker.ml(y,n,unlist(hy))
  
  sig2 = hy[[dim.mode+2]]; tau2 = hy[[dim.mode+3]]
  
  e.ini = as.matrix(bdiag(lapply(ind,function(ind) e.ind(ind)%*%B.tuck)))
  k.ml.s0 = ker.sele(x,x,hy[[dim.mode+1]])
  k.ml.s = sig2*e.ini%*%kronecker(k.ml.s0,omega)%*%t(e.ini)+tau2*diag(n*k)
  
  k.ml.10 = ker.sele(x.new,x,hy[[dim.mode+1]])
  k.ml.1 = sig2*kronecker(k.ml.10,omega)%*%t(e.ini)
  
  k.ml.00 = ker.sele(x.new,x.new,hy[[dim.mode+1]])
  k.ml.0 = sig2*kronecker(k.ml.00,omega)
  
  k.oth = k.ml.1%*%solve(k.ml.s)
  
  f.hat.tuck = k.oth%*%c(y)
  var.hat.tuck = k.ml.0-k.oth%*%t(k.ml.1)
  
  result = list(mean.tuck = f.hat.tuck, cov.tuck = var.hat.tuck)
  return(result)
}
# mlgp.hat(x.star,x,y,n,1,split(runif(dim.hyper.ml), group.lab))



################################################################################
## Our proposed method: NS-mlGP-UCB
n = 5; m = 20; J.for = 1

like.re.ml = hyper.ml = fhat = lapply(1:J.for, function(x) list())
x0.ml = y0.ml = ind.x.ml = ind0.ml = list()
mlgp.bo = h.ml = list()
regret.ml = ins.regret.ml = cum.regret.ml = list()
beta.ml = alpha.ml = ucb.new.ml = lapply(1:J.for, function(x) list())
tab.ind.ml = lapply(1:J.for, function(x) list())


# j.for.ml = 1
for(j.for.ml in 1:J.for){
  
  ind.x = c.ini.set$ind.x.for.new[[j.for.ml]]
  x = data.x[ind.x,]; y.or = y0[,,,ind.x]
  
  k.ind = lapply(1:n, function(i) matrix(data.inx[ind.x[i],],k,dim.mode))
  y.x = alply(y.or, .margins = dim.mode+1)
  y = matrix(sapply(1:n,function(i) 
    y.or[k.ind[[i]][1],k.ind[[i]][2],k.ind[[i]][3],i]),k,n)
  
  y.tuck = B.tuck.ml(y,k.ind,n); B.tuck = Reduce(kronecker,y.tuck$U.tuck)
  
  hyper.ml.old = directL(function(the) likeli.ml(x,x,y,k.ind,n,the)$like,
                          lower.th,upper.th,control=list(maxeval=1000))$par
  hyper.ml.new = optim(par = hyper.ml.old,
                        fn = function(the) likeli.ml(x,x,y,k.ind,n,the)$like,
                        gr = function(the) unlist(der.l(x,x,y,k.ind,n,the)),
                        method = "L-BFGS-B", lower=lower.th, upper=upper.th)$par
  
  like.re.ml[[j.for.ml]][[1]] = likeli.ml(x,x,y,k.ind,n,hyper.ml.new)
  hyper.ml[[j.for.ml]][[1]] = like.re.ml[[j.for.ml]][[1]]$the0
  
  x0 = data.x; inx0 = c(1:N)
  x0.ml[[j.for.ml]] = x; y0.ml[[j.for.ml]] = y; ind.x.ml[[j.for.ml]] = ind.x
  n.ml = n; ind0.ml[[j.for.ml]] = k.ind
  
  x.new.ml = t(as.matrix(x[which.max(apply(y,2,h)),]))
  y.new.ml = y[,which.max(apply(y,2,h))]
  ind.new.ml = list(k.ind[[which.max(apply(y,2,h))]])
  e.new = as.matrix(bdiag(lapply(ind.new.ml,function(ind) e.ind(ind)%*%B.tuck)))
  
  hyper.ml.ucb = unlist(hyper.ml[[j.for.ml]][[1]]); delta.ml = 0.05
  fhat[[j.for.ml]][[1]] = mlgp.hat(x.new.ml,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],
                                     ind0.ml[[j.for.ml]],n.ml,1,hyper.ml[[j.for.ml]][[1]])
  
  for(i.ml in 1:m){
    sig2 = hyper.ml[[j.for.ml]][[i.ml]][[dim.mode+3]]
    eta = hyper.ml[[j.for.ml]][[i.ml]][[dim.mode+3]]
    it1 = Map(function(A) determinant(diag(dim.h)+1/eta*A[[2]],logarithm=TRUE)$modulus,fhat[[j.for.ml]])
    beta.ml[[j.for.ml]][[i.ml]] = sqrt(Matrix::norm(e.new%*%fhat[[j.for.ml]][[1]]$mean.tuck,type="2"))+
      sqrt(sig2/eta)*sqrt(2*log(1/delta.ml)+Reduce(sum,it1))
    alpha.ml[[j.for.ml]][[i.ml]] = 2*log(dim.h*i.ml^2*m*6/(pi^2*delta.ml))
    
    ucb.ml <- function(x.new,beta){
      x.new = matrix(x.new,1,d)
      mlgp.output = mlgp.hat(x.new,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],
                               n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml]])
      ucb = sum(e.new%*%mlgp.output$mean.tuck)+beta*sqrt(Matrix::norm(e.new%*%mlgp.output$cov.tuck%*%t(e.new),type="2"))
      return(ucb)
    }
    
    ucb.x = apply(x0[-ind.x.ml[[j.for.ml]],],1,function(x.new) -ucb.ml(x.new,beta.ml[[j.for.ml]][[i.ml]]))
    ind.x.new.ml = inx0[-ind.x.ml[[j.for.ml]]][which.min(ucb.x)] #which(apply(x0, 1, function(row) all(row == x0[-ind.x.ml[[j.for.ml]],][which.min(ucb.x),])))[1]
    x.new.ml = x0[ind.x.new.ml,]; y.new.ml.0 = y0[,,,ind.x.new.ml]
    
    ml.output = mlgp.hat(x.new.ml,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,n.test=1,
                           hyper.ml[[j.for.ml]][[i.ml]])
    # ucb.new.ml.cmab = array(e.new%*%ml.output$mean.tuck + sqrt(alpha.ml[[j.for.ml]][[i.ml]])*sqrt(diag(e.new%*%ml.output$cov.tuck%*%t(e.new))), dim.f)
    # ind.new.ml = list(arrayInd(order(ucb.new.ml.cmab, decreasing = TRUE)[1:k],dim.f))
    ind.new.ml = list(t(as.matrix(data.inx[ind.x.new.ml,])))
    e.new = e.ind(ind.new.ml[[1]])%*%B.tuck
    y.new.ml = y.new.ml.0[ind.new.ml[[1]]]
    
    ucb.new.ml[[j.for.ml]][[i.ml]] = ucb.ml(x.new.ml,beta.ml[[j.for.ml]][[i.ml]])
    fhat[[j.for.ml]][[i.ml+1]] = mlgp.hat(x.new.ml,ind.new.ml,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],
                                             n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml]])
    
    x0.ml[[j.for.ml]] = rbind(x0.ml[[j.for.ml]], x.new.ml)
    y0.ml[[j.for.ml]] = cbind(y0.ml[[j.for.ml]], y.new.ml)
    ind.x.ml[[j.for.ml]] = c(ind.x.ml[[j.for.ml]],ind.x.new.ml)
    
    ind0.ml[[j.for.ml]] = append(ind0.ml[[j.for.ml]], ind.new.ml)
    
    n.ml = n+i.ml
    
    if(i.ml %% 50 == 0){
      hyper.ml.ucb = optim(par = unlist(hyper.ml[[j.for.ml]][[i.ml]]), 
                            fn = function(the) likeli.ml(x0.ml[[j.for.ml]],x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,the)$like,
                            gr = function(the) unlist(der.l(x0.ml[[j.for.ml]],x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,the)), 
                            method = "L-BFGS-B", lower=lower.th, upper=upper.th)$par
    }else{
      hyper.ml.ucb = hyper.ml.ucb
    }
    
    hyper.ml.ucb = runif(dim.hyper.ml,lower.th,upper.th)
    like.re.ml[[j.for.ml]][[i.ml+1]] = likeli.ml(x0.ml[[j.for.ml]],x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],n.ml,hyper.ml.ucb)
    hyper.ml[[j.for.ml]][[i.ml+1]] = like.re.ml[[j.for.ml]][[i.ml+1]]$the0
    print(i.ml)
  }
  mlgp.ucb <- function(x.new,ind.new) mlgp.hat(x.new,ind.new,x0.ml[[j.for.ml]],y0.ml[[j.for.ml]],ind0.ml[[j.for.ml]],
                                                 n.ml,n.test=1,hyper.ml[[j.for.ml]][[i.ml+1]]) 
  mlgp.bo[[j.for.ml]] = mlgp.ucb
  
  h.ml[[j.for.ml]] = apply(y0.ml[[j.for.ml]],2,h)
  regret.ml[[j.for.ml]] = h.star-h.ml[[j.for.ml]]
  
  layout(matrix(1, nrow = 1, ncol = 1))
  plot(rep(h.star,(n.ml-n+1)),type="b",lwd=3,lty=1,pch=1,col=1,ylim=c(cummax(h.ml[[j.for.ml]])[n],h.star))
  lines(cummax(h.ml[[j.for.ml]])[n:n.ml],type="b",lwd=3,lty=2,pch=2,col=2)
  
  ins.regret.ml[[j.for.ml]] = h.star-cummax(h.ml[[j.for.ml]][(n+1):n.ml])
  cum.regret.ml[[j.for.ml]] = cumsum(ins.regret.ml[[j.for.ml]])
  
  plot(cumsum(ins.regret.ml[[j.for.ml]]),type="b",lwd=3,lty=2,pch=2,col=2)
  
  
  tab.ind.ml[[j.for.ml]] = unlist(lapply(ind0.ml[[j.for.ml]],
                                           function(a) sum(apply(a, 1, function(row) any(duplicated(rbind(row, ind.star)))))))
  tab.ind.ml[[j.for.ml]][which.max(h.ml[[j.for.ml]])]
  print(j.for.ml)
}

  
  
  
mat <- do.call(rbind, cum.regret.ml)
cum.regret.mean <- apply(mat,2, mean)
cum.regret.lower <- apply(mat,2, function(x) quantile(x, 0.025))
cum.regret.upper <- apply(mat,2, function(x) quantile(x, 0.975))


time <- 1:m
plot(time, cum.regret.mean, type = "l", lwd = 2, col = "blue", ylim = range(c(cum.regret.lower, cum.regret.upper)),
     ylab = "Value", xlab = "Time", main = "Mean with 95% Confidence Band")
polygon(c(time, rev(time)),
        c(cum.regret.upper, rev(cum.regret.lower)),
        col = rgb(0.1, 0.2, 0.9, 0.2), border = NA)
lines(time, cum.regret.mean, col = "blue", lwd = 2)


pml.ucb.list = list(ind.x.star=ind.x.star, h.star=h.star, ind.star=ind.star, ora.x.star=ora.x.star,         
                     like.re.ml=like.re.ml, hyper.ml=hyper.ml, fhat=fhat,
                     x0.ml=x0.ml, y0.ml=y0.ml, mlgp.bo=mlgp.bo, 
                     h.ml=h.ml, regret.ml=regret.ml, ins.regret.ml=ins.regret.ml, cum.regret.ml=cum.regret.ml,
                     ind.x.ml=ind.x.ml, ind0.ml=ind0.ml, tab.ind.ml=tab.ind.ml,
                     beta.ml=beta.ml, alpha.ml=alpha.ml, ucb.new.ml=ucb.new.ml)
save(pml.ucb.list, file="c2.pml.ucb.list.RData")


  
  
  